#### "Gender Inequality and Authoritarian Regimes" ####
# authors: "Lars Pelke"
# date: 2020-01-20

# clear workspace
rm(list=ls())

#libraries
library(tidyverse)
library(ggpubr)
library(panelr)
library(readstata13)
library(texreg)
library(dotwhisker)
library(broom.mixed)
library(countrycode)

library(lme4)
library(sjPlot)
library(sjmisc)
library(lfe)

library(MCMCpack)

# set working directory
setwd("M:/Promotion/Paper/Gender Inequality/calculations/data/rawdata")

##Import VDEM DATA
vdem <- read_csv("vdem_9/V-Dem-CY-Full+Others-v9.csv") 
vdem <- vdem %>%
  rename(cown = COWcode)

vdem$cown <- countrycode(vdem$country_name, "country.name", "cown", warn = TRUE)

##Importatn Wahman, Teorell and Hadenius data ##
library(democracyData)

wth <- wahman_teorell_hadenius

wth$cown <- countrycode(wth$wahman_teorell_hadenius_country, "country.name", "cown", warn = TRUE)

## Import EPR data ##

eprlong <- read.csv("epr/EPR-2018.1.1.csv", as.is = T) %>%
  rename(country = statename) %>%
  arrange(country)

# compare country names in EPR to dataset
setdiff(ds.fullraw$country, eprlong$country)

# recode differing country names
eprlong$country[eprlong$country == "Czech Republic"] <- "Czechia"
eprlong$country[eprlong$country == "Republic of Korea"] <- "South Korea"
eprlong$country[eprlong$country == "Serbia" | 
                  eprlong$country == "Serbia and Montenegro"] <- "Yugoslavia/Serbia"

# transform into country-year data
eprlongcy <- eprlong %>% 
  rowwise() %>%
  do(data.frame(country = .$country,
                from = .$from,
                to = .$to,
                year = seq(.$from, .$to, by = 1),
                group = .$group,
                gwgroupid =.$gwgroupid,
                umbrella = .$umbrella,
                size = .$size,
                status = .$status)) %>%
  arrange(country, year)

# reduce dataset
epr <- eprlongcy %>%
  filter(year >= 1964)

# add country-year variable
epr$countryyear <- paste(epr$country, epr$year)

# create exclusion dummy variable
epr <- epr %>%
  mutate(exclusion = if_else(status == "DISCRIMINATED"  | 
                               status == "POWERLESS" |
                               status == "SELF-EXCLUSION", 1, 0))

# create exclusion ratio variable
epr <- epr %>%
  group_by(country, year) %>%
  summarise(eprratio = sum(size[exclusion == 1]))

epr$cown <- countrycode(epr$country, "country.name", "cown", warn = TRUE)

epr <- epr %>%
  ungroup()%>%
  dplyr:: select(cown, year, eprratio)

#### Create final basic dataset ####

vdem <- vdem %>%
  left_join(wth, c("cown", "year"))

vdem <- vdem %>%
  left_join(epr, by = c("cown", "year"))
rm(epr, eprlong, eprlongcy)

#### Filter authoritarian regimes based on Hadenius et al. ####

vdem <- vdem %>%
  filter(year >=1970) %>%
  filter(regime1ny %in% c(1, 2, 3, 4, 9)) # 3751 country-years that are an autocracy

vdem <- vdem %>%
  mutate(wth_regime = case_when(regime1ny==1 ~ "monarchy", 
                                regime1ny==2 ~ "military", 
                                regime1ny==3 ~ "one-party", 
                                regime1ny==4 ~ "multi-party", 
                                regime1ny==9 ~ "no-party"))

table(vdem$wth_regime)

# military    monarchy multi-party    no-party   one-party 
# 972         453        1479          36         811 

#### Analysis of (political) exclusion by gender ####

#### Building and reshaping Variables ####

vdem <- vdem %>%
  dplyr::select(cown, country_name, country_id, year, wth_regime, eprratio, e_mipopula, e_wb_pop, 
                e_migdppcln, starts_with("e_total"), v2lgfemleg , v2elfemrst, e_pefeliex, v2lgqugen, v2elloelsy, 
                v2elparlel, starts_with("v2exl_legitideolcr"), v2exl_legitideol, v2exl_legitlead, v2exl_legitperf, 
                v2exl_legitratio, v2pepwrgen, v2peasjgen, v2peasbgen, v2xpe_exlgender, starts_with("v2elmulpar"), 
                v2xps_party, v2psorgs, v2psprbrch, v2psprlnks, v2psplats, v2pscohesv, v2x_regime, v2clgencl, 
                v2peapsgen,v2peasjgen, v2peasbgen)

## generate variables ##

vdem <- vdem %>%
  mutate(e_wb_pop_ln = log10(e_wb_pop))

vdem <- vdem %>% 
  mutate(v2lgqugen = ifelse(v2lgqugen>=1, 1, 0))


## Rescale Natural Resource Variable ##

vdem <- vdem %>%
  mutate(e_total_resources_income_pc = e_total_resources_income_pc/1000)

summary(vdem$e_total_resources_income_pc)

#### Bayesian Factor Analysis for Input and Output Dimension of Gender Inclusion #####

#gender exclusion index
summary(vdem$v2xpe_exlgender)

## Input Dimension ##
# power distributed by gender: v2pepwgen
# equality in respect for civil liberties by gender: v2clgencl

posterior_vdem_input <- MCMCfactanal(~v2pepwrgen + v2clgencl, factors=1,
                                     verbose=0, store.scores=FALSE, a0=1, b0=0.15,
                                     data=vdem, burnin=1000, mcmc=20000, thin=1)

## First, the loadings (Lambda). ##

loadings.input <- summary(posterior_vdem_input)$statistics[1:2]
loadings.input
ci.loadings.input <- summary(posterior_vdem_input)$quantiles[1:2,c(1,5)]
ci.loadings.input

## Second, Uniqueness ##
uniquenesses.input <- data.frame(summary(posterior_vdem_input)$statistics[3:4])
names(uniquenesses.input)[1] <- "uniquenesses"
uniquenesses.input

## extract the point estimates from the Bayesian factor analysis

posterior_vdem_input <- MCMCfactanal(~v2pepwrgen + v2clgencl, factors=1,
                                     verbose=0, store.scores=TRUE, a0=1, b0=0.15,
                                     data=vdem, burnin=1000, mcmc=20000, thin=1)

loadings.input.df <- summary(posterior_vdem_input)$statistics

loadings.input.df <- loadings.input.df[5:3515,]

loadings.input.df <- as_tibble(loadings.input.df) %>%
  dplyr::select(Mean, SD) %>%
  rename(gender_inclusion_input_mean = Mean, 
         gender_inclusion_input_sd = SD)

vdem_input <- vdem %>%
  drop_na(v2pepwrgen, v2clgencl) %>%
  dplyr::select(country_name, year, cown, v2pepwrgen, v2clgencl,v2xpe_exlgender)

vdem_input <- vdem_input %>%
  bind_cols(loadings.input.df)

## Output Dimension##
# access to public serives by gender (v2peapsgen)
# access to state jobs by gender (v2peasjgen)
# access to state business opportunities by gender (v2peasbgen)

posterior_vdem_output <- MCMCfactanal(~v2peapsgen + v2peasjgen + v2peasbgen, factors=1,
                                      verbose=0, store.scores=FALSE, a0=1, b0=0.15,
                                      data=vdem, burnin=1000, mcmc=20000, thin=1)

## First, the loadings (Lambda). ##

loadings.output <- summary(posterior_vdem_output)$statistics[1:3]
loadings.output
ci.loadings.output <- summary(posterior_vdem_output)$quantiles[1:3,c(1,5)]
ci.loadings.output

## Second, Uniqueness ##
uniquenesses.output <- data.frame(summary(posterior_vdem_output)$statistics[4:6])
names(uniquenesses.output)[1] <- "uniquenesses"
uniquenesses.output

## extract the point estimates from the Bayesian factor analysis

posterior_vdem_output <- MCMCfactanal(~v2peapsgen + v2peasjgen + v2peasbgen, factors=1,
                                      verbose=0, store.scores=TRUE, a0=1, b0=0.15,
                                      data=vdem, burnin=1000, mcmc=20000, thin=1)

loadings.output.df <- summary(posterior_vdem_output)$statistics

loadings.output.df2 <- loadings.output.df[7:3498,]

loadings.output.df2 <- as_tibble(loadings.output.df2) %>%
  dplyr::select(Mean, SD) %>%
  rename(gender_inclusion_output_mean = Mean, 
         gender_inclusion_output_sd = SD)

vdem_output <- vdem %>%
  drop_na(v2peapsgen, v2peasjgen, v2peasbgen) %>%
  dplyr::select(country_name, year, cown, v2peapsgen, v2peasjgen, v2peasbgen, v2xpe_exlgender)

vdem_output <- vdem_output %>%
  bind_cols(loadings.output.df2)

## Merge datasets ##

vdem_input <- vdem_input %>%
  dplyr::select(country_name, year, cown, gender_inclusion_input_mean, gender_inclusion_input_sd)

vdem_output<- vdem_output %>%
  dplyr::select(country_name, year, cown, gender_inclusion_output_mean, gender_inclusion_output_sd)

vdem <- vdem %>%
  left_join(vdem_input, by = c("country_name", "year", "cown"))

vdem <- vdem %>%
  left_join(vdem_output, by = c("country_name", "year", "cown"))

rm(posterior_vdem_input, posterior_vdem_output)

#### Regime Types Changes within countries ####

vdem_reg_change <- vdem %>%
  group_by(country_name) %>%
  mutate(reg_change = ifelse(wth_regime != lag(wth_regime), 1, 0)) %>%
  dplyr::select(country_name, year, wth_regime, reg_change)

table(vdem_reg_change$reg_change) # 180 regime change within authoritarian countries

#### Generate Sample ####

vdem <- distinct(vdem, country_id, year, .keep_all= TRUE)

# delete those countries with less than 3 country-year observations to dela with singular fit 
vdem <- vdem %>% # delete those countries with less than 3 country-year observations to dela with singular fit 
  dplyr::group_by(cown) %>%
  mutate(num_years = max(year) - min(year)) %>%
  filter(num_years >= 5) %>%
  ungroup(cown)

saveRDS(vdem, file = "calculations/data/vdem_wth_merged.rds")